library(ggpmisc)
## Loading required package: ggpp
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggpp':
##   method                  from   
##   heightDetails.titleGrob ggplot2
##   widthDetails.titleGrob  ggplot2
## 
## Attaching package: 'ggpp'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggpp::annotate() masks ggplot2::annotate()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/zgold/Documents/GitHub/OME-EcoFOCI/EcoFOCI
library(lubridate)
library(measurements)
library(sp)
library(readr)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library("rnaturalearth")
library("rnaturalearthdata")
## 
## Attaching package: 'rnaturalearthdata'
## 
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(cmocean)
library(phyloseq)

library(tidyverse)
library(phyloseq)
library(metagMisc)
## 
## Attaching package: 'metagMisc'
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(iNEXT)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
library('phyloseq')
library(ranacapa)
library(here)
library(patchwork)

Load Data

long_COI_EcoFOCI <- readRDS(here("data","long_data","long_COI_EcoFOCI.RDS"))

Site Mapping

long_COI_EcoFOCI %>% ungroup() %>% 
  dplyr::select( Sample  ,         replicates   ,        group1   ,           
group2   ,            Sample_Name  ,        Biological_Replicate,
Technical_Replicate , Negative_control,     Cruise_ID_short  ,   
Cruise_ID_long ,      Cast_No.   ,          Rosette_position   , 
Station   ,           Sample_volume_ml ,    Time  ,              
Depth_m  ,            lat  ,                lon  ,               
Temp_C  ,             PSU_ppt  ,            OxySatPerc_percent  ,
OxyConc_umol.per.l ,  Chla_ugrams.per.l ,   PO4_umol.per.l  ,    
NO2_umol.per.l  ,     NO3_umol.per.l   ,    NH4_umol.per.l     , 
sites) %>% distinct() %>% 
  filter(., Cruise_ID_short !="SKQ23-12S") %>% 
    mutate(., Depth_bin = if_else(Depth_m >29, "Below 30m","Above 30m" )) -> sample_metadata
sample_metadata %>% 
  ggplot(aes(x=Depth_m)) +   geom_histogram(aes(x=Depth_m,y=..density..), position="identity") + 
  geom_density(aes(x=Depth_m,y=..density..))
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Temp Map

min_lat <- min(long_COI_EcoFOCI$lat, na.rm = T)
max_lat <- max(long_COI_EcoFOCI$lat, na.rm = T)

min_lon <- min(long_COI_EcoFOCI$lon, na.rm = T)
max_lon <- max(long_COI_EcoFOCI$lon, na.rm = T)

world <- ne_countries(scale = "medium", returnclass = "sf")

colnames(long_COI_EcoFOCI)
##  [1] "Run2:ASV"             "ASV_combo"            "Run3:ASV"            
##  [4] "Sequence"             "Kingdom"              "Phylum"              
##  [7] "Class"                "Order"                "Family"              
## [10] "Genus"                "Species"              "Sample"              
## [13] "nReads"               "replicates"           "group1"              
## [16] "group2"               "Sample_Name"          "Biological_Replicate"
## [19] "Technical_Replicate"  "Negative_control"     "Cruise_ID_short"     
## [22] "Cruise_ID_long"       "Cast_No."             "Rosette_position"    
## [25] "Station"              "Sample_volume_ml"     "Time"                
## [28] "Depth_m"              "lat"                  "lon"                 
## [31] "Temp_C"               "PSU_ppt"              "OxySatPerc_percent"  
## [34] "OxyConc_umol.per.l"   "Chla_ugrams.per.l"    "PO4_umol.per.l"      
## [37] "NO2_umol.per.l"       "NO3_umol.per.l"       "NH4_umol.per.l"      
## [40] "sites"                "Prop.abund"           "eDNA.Index"
sample_metadata %>% 
  dplyr::select(Station, lat, lon) %>%  
  mutate(., lat=round(lat,1),
         lon=round(lon,1)) %>% 
  distinct() %>% 
  filter(., Station %in% c("M5","M8","DBO3.4")) -> sites_to_label

library(ggrepel)
ggplot(data = world) +
    geom_sf() +
    geom_point(data = sample_metadata, aes(x = lon, y = lat, colour=Temp_C), size=2) +scale_colour_cmocean(name="thermal") +
    coord_sf(xlim = c(min_lon-1, max_lon+1), ylim = c(min_lat-1, max_lat+1), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") + facet_wrap(Depth_bin~Cruise_ID_short)+ geom_text_repel(data = sites_to_label, aes(x = lon, y = lat, label=Station),size =3,min.segment.length = unit(0, 'lines'), nudge_x = 4, nudge_y =  0.2)

Salinity

ggplot(data = world) +
    geom_sf() +
    geom_point(data = sample_metadata, aes(x = lon, y = lat, colour=PSU_ppt), size=2) +scale_colour_cmocean(name="haline") +
    coord_sf(xlim = c(min_lon-1, max_lon+1), ylim = c(min_lat-1, max_lat+1), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") + facet_wrap(Depth_bin~Cruise_ID_short)+ geom_text_repel(data = sites_to_label, aes(x = lon, y = lat, label=Station),size =3,min.segment.length = unit(0, 'lines'), nudge_x = 4, nudge_y =  0.2)

Chla

ggplot(data = world) +
    geom_sf() +
    geom_point(data = sample_metadata, aes(x = lon, y = lat, colour=Chla_ugrams.per.l), size=2) +scale_colour_cmocean(name="algae", limits= c(0,15)) +
    coord_sf(xlim = c(min_lon-1, max_lon+1), ylim = c(min_lat-1, max_lat+1), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") + facet_wrap(Depth_bin~Cruise_ID_short)+ geom_text_repel(data = sites_to_label, aes(x = lon, y = lat, label=Station),size =3,min.segment.length = unit(0, 'lines'), nudge_x = 4, nudge_y =  0.2)

Oxy. Sat.

ggplot(data = world) +
    geom_sf() +
    geom_point(data = sample_metadata, aes(x = lon, y = lat, colour=OxySatPerc_percent), size=2) +scale_colour_cmocean(name="oxy") +
    coord_sf(xlim = c(min_lon-1, max_lon+1), ylim = c(min_lat-1, max_lat+1), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") + facet_wrap(Depth_bin~Cruise_ID_short)+ geom_text_repel(data = sites_to_label, aes(x = lon, y = lat, label=Station),size =3,min.segment.length = unit(0, 'lines'), nudge_x = 4, nudge_y =  0.2)

Ammonium

ggplot(data = world) +
    geom_sf() +
    geom_point(data = sample_metadata, aes(x = lon, y = lat, colour=NH4_umol.per.l), size=2) +scale_colour_cmocean(name="haline") +
    coord_sf(xlim = c(min_lon-1, max_lon+1), ylim = c(min_lat-1, max_lat+1), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") + facet_wrap(Depth_bin~Cruise_ID_short)+ geom_text_repel(data = sites_to_label, aes(x = lon, y = lat, label=Station),size =3,min.segment.length = unit(0, 'lines'), nudge_x = 4, nudge_y =  0.2)

long_COI_EcoFOCI %>% 
  filter(., !is.na(Species)) %>% 
  filter(., nReads >0) %>% 
  dplyr::group_by(ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  dplyr::summarise(sum_reads=sum(nReads)) %>% 
  arrange(desc(sum_reads))
## `summarise()` has grouped output by 'ASV_combo', 'Kingdom', 'Phylum', 'Class',
## 'Order', 'Family', 'Genus'. You can override using the `.groups` argument.
## # A tibble: 3,721 × 9
## # Groups:   ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus [3,721]
##    ASV_combo Kingdom   Phylum         Class Order Family Genus Species sum_reads
##    <chr>     <chr>     <chr>          <chr> <chr> <chr>  <chr> <chr>       <int>
##  1 ASVc_3    Eukaryota Nematoda       Enop… Dory… Longi… Xiph… Xiphin…   1518550
##  2 ASVc_4    Eukaryota Arthropoda     Hexa… Cala… Claus… Pseu… Pseudo…   1214342
##  3 ASVc_6    Eukaryota Arthropoda     Hexa… Cala… Acart… Acar… Acarti…    785302
##  4 ASVc_16   Eukaryota Pelagophyceae… Pela… Pela… Pelag… Aure… Aureoc…    664502
##  5 ASVc_11   Eukaryota Arthropoda     Hexa… Cala… Claus… Pseu… Pseudo…    447474
##  6 ASVc_12   Eukaryota Annelida       Poly… Spio… Spion… Prio… Priono…    414509
##  7 ASVc_13   Eukaryota Arthropoda     Hexa… Cala… Claus… Pseu… Pseudo…    413313
##  8 ASVc_20   Eukaryota Arthropoda     Hexa… Cala… Claus… Pseu… Pseudo…    245251
##  9 ASVc_50   Eukaryota Arthropoda     Hexa… Cycl… Oitho… Oith… Oithon…    200321
## 10 ASVc_34   Eukaryota Arthropoda     Hexa… Cala… Claus… Pseu… Pseudo…    157265
## # ℹ 3,711 more rows

CO1 Data Exploration

Beta Diversity

Build All Method Phyloseq Object

ASV Reads
varnames <- colnames(long_COI_EcoFOCI)
to_remove <- c("Run2:ASV","Run3:ASV","ASV_combo","Sequence","nReads","Prop.abund","eDNA.Index")


sample_metadata -> co1_sample_data

#Metadata
co1_sample_data %>%  as.data.frame() -> sampledata
rownames(sampledata) <- sampledata$Sample
sample_data(sampledata) -> sampledata

#ASV Reads

long_COI_EcoFOCI %>% 
  dplyr::select(ASV_combo,Sample, eDNA.Index) %>% 
  mutate(., eDNA.Index=as.numeric(eDNA.Index)) %>% 
  mutate(., eDNA.Index=if_else(eDNA.Index=="NaN", 0, eDNA.Index)) %>%
  pivot_wider(names_from = Sample, values_from = eDNA.Index, values_fill  =0) -> wide_PA

long_COI_EcoFOCI %>% 
  dplyr::select(ASV_combo, Kingdom,Phylum,Class, Order,Family, Genus, Species) %>% 
  distinct() %>% as.matrix() -> taxonomy_table

rownames(taxonomy_table) <- wide_PA$ASV_combo

TAX = tax_table(taxonomy_table)

wide_PA %>% 
  ungroup() %>% 
  dplyr::select(-ASV_combo) %>% as.matrix() -> otu_table
rownames(otu_table) <- wide_PA$ASV_combo

OTU = otu_table(otu_table, taxa_are_rows = TRUE)
physeq_obj._CO1 = phyloseq(OTU, TAX, sampledata)

subsurface_co1 <- subset_samples(physeq_obj._CO1, Depth_bin == "Below 30m")
surface_co1 <- subset_samples(physeq_obj._CO1, Depth_bin == "Above 30m")

PERMANOVA

surface_co1 Jaccard

surface_co1_c = subset_samples(surface_co1, !is.na(Temp_C) & !is.na(lat) & !is.na(Depth_m))

 

#Betadiversity
#Generate Vegan formatted data table
method.sampledf <- data.frame(sample_data(surface_co1_c))
method.rel_abun<- vegan_otu(surface_co1_c)

#Jaccard dissimilarity matrix
method.d_carn <- vegdist(method.rel_abun, method="bray", binary=TRUE)

method.sampledf$Cruise_ID_short %>%  unique()
## [1] "SKQ2021" "DY2012"  "NO20"
#PERMANOVA: Method+Site

method.adonis_results <- adonis2(method.rel_abun~ Depth_m+lat+Temp_C+PSU_ppt+OxySatPerc_percent+OxyConc_umol.per.l+Chla_ugrams.per.l+PO4_umol.per.l+NO2_umol.per.l+NO3_umol.per.l+NH4_umol.per.l
, data=method.sampledf,  na.action =na.exclude, by = "margin")

method.adonis_results
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = method.rel_abun ~ Depth_m + lat + Temp_C + PSU_ppt + OxySatPerc_percent + OxyConc_umol.per.l + Chla_ugrams.per.l + PO4_umol.per.l + NO2_umol.per.l + NO3_umol.per.l + NH4_umol.per.l, data = method.sampledf, by = "margin", na.action = na.exclude)
##                    Df SumOfSqs      R2      F Pr(>F)   
## Depth_m             1   0.4095 0.03663 1.1561  0.096 . 
## lat                 1   0.5228 0.04676 1.4758  0.007 **
## Temp_C              1   0.3844 0.03438 1.0852  0.193   
## PSU_ppt             1   0.3837 0.03432 1.0834  0.203   
## OxySatPerc_percent  1   0.3820 0.03416 1.0784  0.210   
## OxyConc_umol.per.l  1   0.3810 0.03408 1.0756  0.216   
## Chla_ugrams.per.l   1   0.4054 0.03626 1.1444  0.114   
## PO4_umol.per.l      1   0.3920 0.03506 1.1066  0.166   
## NO2_umol.per.l      1   0.4470 0.03998 1.2619  0.032 * 
## NO3_umol.per.l      1   0.3849 0.03443 1.0866  0.200   
## NH4_umol.per.l      1   0.4179 0.03738 1.1799  0.080 . 
## Residual           15   5.3132 0.47522                 
## Total              26  11.1804 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
method.adonis_results_simple <- adonis2(method.rel_abun~Depth_m+lat+Temp_C+PSU_ppt+OxyConc_umol.per.l+Chla_ugrams.per.l+lon, data=method.sampledf, by = "terms")

method.adonis_results_simple
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = method.rel_abun ~ Depth_m + lat + Temp_C + PSU_ppt + OxyConc_umol.per.l + Chla_ugrams.per.l + lon, data = method.sampledf, by = "terms")
##                     Df SumOfSqs      R2      F Pr(>F)    
## Depth_m              1    1.369 0.01926 3.3638  0.001 ***
## lat                  1    2.636 0.03708 6.4752  0.001 ***
## Temp_C               1    1.638 0.02304 4.0243  0.001 ***
## PSU_ppt              1    1.153 0.01622 2.8333  0.001 ***
## OxyConc_umol.per.l   1    1.377 0.01937 3.3829  0.001 ***
## Chla_ugrams.per.l    1    0.798 0.01123 1.9614  0.001 ***
## lon                  1    1.057 0.01487 2.5967  0.001 ***
## Residual           150   61.059 0.85892                  
## Total              157   71.088 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(method.adonis_results_simple)
##        Df            SumOfSqs             R2                F        
##  Min.   :  1.00   Min.   : 0.7984   Min.   :0.01123   Min.   :1.961  
##  1st Qu.:  1.00   1st Qu.: 1.1533   1st Qu.:0.01622   1st Qu.:2.715  
##  Median :  1.00   Median : 1.3770   Median :0.01937   Median :3.364  
##  Mean   : 34.89   Mean   :15.7973   Mean   :0.22222   Mean   :3.520  
##  3rd Qu.:  1.00   3rd Qu.: 2.6358   3rd Qu.:0.03708   3rd Qu.:3.704  
##  Max.   :157.00   Max.   :71.0877   Max.   :1.00000   Max.   :6.475  
##                                                       NA's   :2      
##      Pr(>F)     
##  Min.   :0.001  
##  1st Qu.:0.001  
##  Median :0.001  
##  Mean   :0.001  
##  3rd Qu.:0.001  
##  Max.   :0.001  
##  NA's   :2

surface_co1 Bray

surface_co1_c = subset_samples(surface_co1, !is.na(Temp_C) & !is.na(lat) & !is.na(Depth_m))

 

#Betadiversity
#Generate Vegan formatted data table
method.sampledf <- data.frame(sample_data(surface_co1_c))
method.rel_abun<- vegan_otu(surface_co1_c)

#Jaccard dissimilarity matrix
method.d_carn <- vegdist(method.rel_abun, method="bray")

#PERMANOVA: Method+Site

method.adonis_results <- adonis2(method.rel_abun~ Depth_m+lat+Temp_C+PSU_ppt+OxySatPerc_percent+OxyConc_umol.per.l+Chla_ugrams.per.l+PO4_umol.per.l+NO2_umol.per.l+NO3_umol.per.l+NH4_umol.per.l
, data=method.sampledf,  na.action =na.exclude, by = "margin")

method.adonis_results
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = method.rel_abun ~ Depth_m + lat + Temp_C + PSU_ppt + OxySatPerc_percent + OxyConc_umol.per.l + Chla_ugrams.per.l + PO4_umol.per.l + NO2_umol.per.l + NO3_umol.per.l + NH4_umol.per.l, data = method.sampledf, by = "margin", na.action = na.exclude)
##                    Df SumOfSqs      R2      F Pr(>F)   
## Depth_m             1   0.4095 0.03663 1.1561  0.119   
## lat                 1   0.5228 0.04676 1.4758  0.003 **
## Temp_C              1   0.3844 0.03438 1.0852  0.215   
## PSU_ppt             1   0.3837 0.03432 1.0834  0.226   
## OxySatPerc_percent  1   0.3820 0.03416 1.0784  0.231   
## OxyConc_umol.per.l  1   0.3810 0.03408 1.0756  0.242   
## Chla_ugrams.per.l   1   0.4054 0.03626 1.1444  0.115   
## PO4_umol.per.l      1   0.3920 0.03506 1.1066  0.162   
## NO2_umol.per.l      1   0.4470 0.03998 1.2619  0.038 * 
## NO3_umol.per.l      1   0.3849 0.03443 1.0866  0.218   
## NH4_umol.per.l      1   0.4179 0.03738 1.1799  0.082 . 
## Residual           15   5.3132 0.47522                 
## Total              26  11.1804 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
method.adonis_results_simple <- adonis2(method.rel_abun~Depth_m+lat+Temp_C+PSU_ppt+OxyConc_umol.per.l+Chla_ugrams.per.l+lon, data=method.sampledf, by = "terms")

method.adonis_results_simple
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = method.rel_abun ~ Depth_m + lat + Temp_C + PSU_ppt + OxyConc_umol.per.l + Chla_ugrams.per.l + lon, data = method.sampledf, by = "terms")
##                     Df SumOfSqs      R2      F Pr(>F)    
## Depth_m              1    1.369 0.01926 3.3638  0.001 ***
## lat                  1    2.636 0.03708 6.4752  0.001 ***
## Temp_C               1    1.638 0.02304 4.0243  0.001 ***
## PSU_ppt              1    1.153 0.01622 2.8333  0.001 ***
## OxyConc_umol.per.l   1    1.377 0.01937 3.3829  0.001 ***
## Chla_ugrams.per.l    1    0.798 0.01123 1.9614  0.001 ***
## lon                  1    1.057 0.01487 2.5967  0.001 ***
## Residual           150   61.059 0.85892                  
## Total              157   71.088 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(method.adonis_results_simple)
##        Df            SumOfSqs             R2                F        
##  Min.   :  1.00   Min.   : 0.7984   Min.   :0.01123   Min.   :1.961  
##  1st Qu.:  1.00   1st Qu.: 1.1533   1st Qu.:0.01622   1st Qu.:2.715  
##  Median :  1.00   Median : 1.3770   Median :0.01937   Median :3.364  
##  Mean   : 34.89   Mean   :15.7973   Mean   :0.22222   Mean   :3.520  
##  3rd Qu.:  1.00   3rd Qu.: 2.6358   3rd Qu.:0.03708   3rd Qu.:3.704  
##  Max.   :157.00   Max.   :71.0877   Max.   :1.00000   Max.   :6.475  
##                                                       NA's   :2      
##      Pr(>F)     
##  Min.   :0.001  
##  1st Qu.:0.001  
##  Median :0.001  
##  Mean   :0.001  
##  3rd Qu.:0.001  
##  Max.   :0.001  
##  NA's   :2
ord <- ordinate(surface_co1_c, method = "NMDS", distance = ("bray"))
## Run 0 stress 0.2084523 
## Run 1 stress 0.2054635 
## ... New best solution
## ... Procrustes: rmse 0.01914398  max resid 0.2141554 
## Run 2 stress 0.2106911 
## Run 3 stress 0.2172672 
## Run 4 stress 0.2158506 
## Run 5 stress 0.2101274 
## Run 6 stress 0.2068392 
## Run 7 stress 0.2066796 
## Run 8 stress 0.2065959 
## Run 9 stress 0.2115227 
## Run 10 stress 0.2173828 
## Run 11 stress 0.2115354 
## Run 12 stress 0.2099896 
## Run 13 stress 0.2107259 
## Run 14 stress 0.20774 
## Run 15 stress 0.2093568 
## Run 16 stress 0.2087188 
## Run 17 stress 0.2062363 
## Run 18 stress 0.2055069 
## ... Procrustes: rmse 0.007296023  max resid 0.08387023 
## Run 19 stress 0.2055085 
## ... Procrustes: rmse 0.007281217  max resid 0.08383755 
## Run 20 stress 0.212606 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     18: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin

NMDS Temp

##Plot_Ordination
plot_ordination(surface_co1_c, ord, "samples", color = "Temp_C") +
  ggtitle("NMDS - Stress 0.2051166") + 
  geom_point(size = 4) + 
  theme_bw()+ 
  theme(plot.title = element_text(hjust = 0.5,size = 20, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 18, face = "bold"),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.text=element_text(size=16), 
        axis.title=element_text(size=16,face="bold"),
        legend.title = element_text( size=16, face="bold"),
        legend.text = element_text(size=14)
        ) + 
  labs(color = "Temp ËšC") +scale_colour_cmocean() 

NMDS Cruise

##Plot_Ordination
plot_ordination(surface_co1_c, ord, "samples", color = "Cruise_ID_short") +
  ggtitle("NMDS - Stress 0.2051166") + 
  geom_point(size = 4) + 
  theme_bw()+ 
  theme(plot.title = element_text(hjust = 0.5,size = 20, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 18, face = "bold"),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.text=element_text(size=16), 
        axis.title=element_text(size=16,face="bold"),
        legend.title = element_text( size=16, face="bold"),
        legend.text = element_text(size=14)
        ) + 
  labs(color = "Cruise")

subsurface Bray

#Betadiversity
#Generate Vegan formatted data table
method.sampledf <- data.frame(sample_data(subsurface_co1))
method.rel_abun<- vegan_otu(subsurface_co1)

#Jaccard dissimilarity matrix
method.d_carn <- vegdist(method.rel_abun, method="bray")

method.adonis_results <- adonis2(method.rel_abun~ Depth_m+lat+Temp_C+PSU_ppt+OxySatPerc_percent+OxyConc_umol.per.l+Chla_ugrams.per.l+PO4_umol.per.l+NO2_umol.per.l+NO3_umol.per.l+NH4_umol.per.l
, data=method.sampledf,  na.action =na.exclude, by = "margin")

method.adonis_results
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = method.rel_abun ~ Depth_m + lat + Temp_C + PSU_ppt + OxySatPerc_percent + OxyConc_umol.per.l + Chla_ugrams.per.l + PO4_umol.per.l + NO2_umol.per.l + NO3_umol.per.l + NH4_umol.per.l, data = method.sampledf, by = "margin", na.action = na.exclude)
##                    Df SumOfSqs      R2      F Pr(>F)    
## Depth_m             1   0.4191 0.01467 1.0389  0.294    
## lat                 1   0.7315 0.02560 1.8134  0.001 ***
## Temp_C              1   0.5510 0.01929 1.3659  0.003 ** 
## PSU_ppt             1   0.6983 0.02444 1.7311  0.001 ***
## OxySatPerc_percent  1   0.5330 0.01866 1.3213  0.004 ** 
## OxyConc_umol.per.l  1   0.5425 0.01899 1.3449  0.003 ** 
## Chla_ugrams.per.l   1   0.6360 0.02226 1.5766  0.001 ***
## PO4_umol.per.l      1   0.5341 0.01870 1.3241  0.007 ** 
## NO2_umol.per.l      1   0.5832 0.02042 1.4458  0.001 ***
## NO3_umol.per.l      1   0.5612 0.01964 1.3913  0.002 ** 
## NH4_umol.per.l      1   0.5634 0.01972 1.3966  0.002 ** 
## Residual           52  20.9761 0.73423                  
## Total              63  28.5688 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ord <- ordinate(subsurface_co1, method = "NMDS", distance = ("bray"))
## Run 0 stress 0.1813475 
## Run 1 stress 0.1919913 
## Run 2 stress 0.1909429 
## Run 3 stress 0.1971168 
## Run 4 stress 0.1957501 
## Run 5 stress 0.1987125 
## Run 6 stress 0.2013363 
## Run 7 stress 0.2112233 
## Run 8 stress 0.1924967 
## Run 9 stress 0.1913625 
## Run 10 stress 0.189341 
## Run 11 stress 0.2066547 
## Run 12 stress 0.1969571 
## Run 13 stress 0.2055974 
## Run 14 stress 0.1813623 
## ... Procrustes: rmse 0.007535433  max resid 0.071869 
## Run 15 stress 0.1909005 
## Run 16 stress 0.1814326 
## ... Procrustes: rmse 0.003412462  max resid 0.03974516 
## Run 17 stress 0.2040004 
## Run 18 stress 0.2051882 
## Run 19 stress 0.1918378 
## Run 20 stress 0.2111891 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     19: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin
##Plot_Ordination
plot_ordination(subsurface_co1, ord, "samples", color = "Cruise_ID_short") +
  ggtitle("NMDS - Stress 0.2051166") + 
  geom_point(size = 4) + 
  theme_bw()+ 
  theme(plot.title = element_text(hjust = 0.5,size = 20, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 18, face = "bold"),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.text=element_text(size=16), 
        axis.title=element_text(size=16,face="bold"),
        legend.title = element_text( size=16, face="bold"),
        legend.text = element_text(size=14)
        ) + 
  labs(color = "Cruise")

## Calanus

long_COI_EcoFOCI %>% 
  dplyr::select(ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  distinct() %>% 
  filter(., Genus=="Calanus")
## # A tibble: 63 × 8
## # Groups:   ASV_combo [63]
##    ASV_combo Kingdom   Phylum     Class       Order     Family    Genus  Species
##    <chr>     <chr>     <chr>      <chr>       <chr>     <chr>     <chr>  <chr>  
##  1 ASVc_436  Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… Calanu…
##  2 ASVc_1190 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… Calanu…
##  3 ASVc_1205 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
##  4 ASVc_1363 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
##  5 ASVc_2058 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
##  6 ASVc_2159 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
##  7 ASVc_2198 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… Calanu…
##  8 ASVc_2280 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… Calanu…
##  9 ASVc_2425 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
## 10 ASVc_2696 Eukaryota Arthropoda Hexanauplia Calanoida Calanidae Calan… <NA>   
## # ℹ 53 more rows
long_COI_EcoFOCI %>% 
  filter(., Genus=="Calanus") %>% 
  dplyr::group_by(ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  dplyr::summarise(sum_reads=sum(nReads)) %>% 
  arrange(desc(sum_reads))
## `summarise()` has grouped output by 'ASV_combo', 'Kingdom', 'Phylum', 'Class',
## 'Order', 'Family', 'Genus'. You can override using the `.groups` argument.
## # A tibble: 63 × 9
## # Groups:   ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus [63]
##    ASV_combo Kingdom   Phylum     Class     Order Family Genus Species sum_reads
##    <chr>     <chr>     <chr>      <chr>     <chr> <chr>  <chr> <chr>       <int>
##  1 ASVc_2280 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       782
##  2 ASVc_2425 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… <NA>          719
##  3 ASVc_1363 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… <NA>          660
##  4 ASVc_2861 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       563
##  5 ASVc_4743 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       254
##  6 ASVc_4903 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       242
##  7 ASVc_7102 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       237
##  8 ASVc_4545 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       229
##  9 ASVc_4627 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… Calanu…       223
## 10 ASVc_2058 Eukaryota Arthropoda Hexanaup… Cala… Calan… Cala… <NA>          200
## # ℹ 53 more rows

Pseudocalanus

long_COI_EcoFOCI %>% 
  dplyr::select(ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  distinct() %>% 
  filter(., Genus=="Pseudocalanus")
## # A tibble: 1,992 × 8
## # Groups:   ASV_combo [1,992]
##    ASV_combo Kingdom   Phylum     Class       Order     Family     Genus Species
##    <chr>     <chr>     <chr>      <chr>       <chr>     <chr>      <chr> <chr>  
##  1 ASVc_4    Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  2 ASVc_11   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  3 ASVc_13   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  4 ASVc_20   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  5 ASVc_29   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  6 ASVc_34   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  7 ASVc_35   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  8 ASVc_60   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
##  9 ASVc_82   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
## 10 ASVc_87   Eukaryota Arthropoda Hexanauplia Calanoida Clausocal… Pseu… Pseudo…
## # ℹ 1,982 more rows
long_COI_EcoFOCI %>% 
  filter(., Genus=="Pseudocalanus") %>% 
  dplyr::group_by(ASV_combo, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>%
  dplyr::summarise(sum_reads=sum(nReads)) %>% 
  arrange(desc(sum_reads)) -> Pseudocalanus_tot
## `summarise()` has grouped output by 'ASV_combo', 'Kingdom', 'Phylum', 'Class',
## 'Order', 'Family', 'Genus'. You can override using the `.groups` argument.
Pseudocalanus_tot %>% 
  filter(., sum_reads > 5000) -> Pseudocalanus_to_keep
  # Pseudocalanus mimus versus Pseudocalanus minutus    
c(setdiff(varnames,to_remove),"Tot") -> varnames_to_group

long_COI_EcoFOCI %>% 
 group_by(Sample) %>%
    mutate(Tot = sum(nReads)) %>% 
    filter(., Genus=="Pseudocalanus") %>% 
    filter(., !is.na(Species)) %>% 
    group_by(!!!syms(varnames_to_group)) %>% 
    dplyr::summarise(nReads=sum(nReads)) %>% 
    mutate (Prop.abund = nReads / Tot) %>% 
      ungroup() %>% 
    group_by (Species) %>%
    mutate (Colmax = max(Prop.abund),
            eDNA.Index = Prop.abund / Colmax) %>% 
  mutate(., Type = case_when(Species == "Pseudocalanus mimus" ~ "Temperate",
                             Species == "Pseudocalanus newmani"~ "Temperate",
                             Species ==  "Pseudocalanus minutus" ~"Arctic",
                             Species ==  "Pseudocalanus acuspes" ~"Arctic",
                             Species ==  "Pseudocalanus moultoni" ~"Temperate",
                             TRUE ~"Unknown"))-> Pseudocalanus_Df
## `summarise()` has grouped output by 'Kingdom', 'Phylum', 'Class', 'Order',
## 'Family', 'Genus', 'Species', 'Sample', 'replicates', 'group1', 'group2',
## 'Sample_Name', 'Biological_Replicate', 'Technical_Replicate',
## 'Negative_control', 'Cruise_ID_short', 'Cruise_ID_long', 'Cast_No.',
## 'Rosette_position', 'Station', 'Sample_volume_ml', 'Time', 'Depth_m', 'lat',
## 'lon', 'Temp_C', 'PSU_ppt', 'OxySatPerc_percent', 'OxyConc_umol.per.l',
## 'Chla_ugrams.per.l', 'PO4_umol.per.l', 'NO2_umol.per.l', 'NO3_umol.per.l',
## 'NH4_umol.per.l', 'sites'. You can override using the `.groups` argument.
Pseudocalanus_Df %>% 
  filter(., Species =="Pseudocalanus mimus") %>% 
  ggplot(., aes(y=eDNA.Index, x=Temp_C)) +geom_point() + facet_wrap(Species~.) +
  stat_poly_line() +
  stat_poly_eq() +
  geom_point()
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_poly_line()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  ggplot(., aes(y=log10(nReads), x=Temp_C, color=Cruise_ID_short)) +geom_point() + facet_wrap(Species~Type)
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads >0, 1,0)) %>% 
  ggplot(., aes(y=PA, x=Temp_C)) +geom_point() + facet_wrap(Species~Type) +
  geom_smooth(method = "glm", 
    method.args = list(family = "binomial"), 
    se = FALSE) 
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads >0, 1,0)) %>% 
  mutate(., Depth_bin = if_else(Depth_m >29, "bottom","surface" )) %>% 
  ggplot(., aes(y=lat, x=Temp_C, colour = PA)) +geom_point() + facet_wrap(Species~Type~Depth_bin)
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads >0, 1,0)) %>% 
  ggplot(., aes(y=log10(Chla_ugrams.per.l), x=Temp_C, colour = eDNA.Index)) +geom_point() + facet_wrap(Species~Type) +    scale_color_gradientn(colours = c("black", "#C6DBEF", 'dodgerblue4'),
                       values = c(0, .Machine$double.eps, 1))
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads >0, 1,0)) %>% 
  mutate(., Time2 = as.POSIXct(Time)) %>% 
  ggplot(., aes(x=Time2, y=Temp_C, colour = eDNA.Index)) +geom_point() + facet_wrap(Species~Type) +    scale_color_gradientn(colours = c("grey80", "lightyellow", 'red'),
                       values = c(0, .Machine$double.eps, 1)) +theme_bw()
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads >0, 1,0)) %>% 
  filter(., PA >0) %>% 
  ggplot(., aes(, x=Temp_C)) +geom_density() + facet_wrap(Species~Type)
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

Pseudocalanus_Df %>% 
  mutate(., PA = if_else(nReads > 0, "Present","Absent")) %>% 
  mutate(., Temp_bin = if_else(Temp_C > 2, "Warm","Cold")) %>% 
  group_by(Species, Temp_bin, PA) %>% 
  dplyr::summarise(N=n()) %>% 
  mutate(freq = N / sum(N)) %>% 
  filter(., !is.na(Temp_bin)) %>% 
  mutate(., Type = case_when(Species == "Pseudocalanus mimus" ~ "Temperate",
                             Species == "Pseudocalanus newmani"~ "Temperate",
                             Species ==  "Pseudocalanus minutus" ~"Arctic",
                             Species ==  "Pseudocalanus acuspes" ~"Arctic",
                             Species ==  "Pseudocalanus moultoni" ~"Temperate",
                             TRUE ~"Unknown")) %>% 
ggplot(., aes(x=PA, y =freq)) + geom_col() + facet_wrap(Species~Type ~Temp_bin)
## `summarise()` has grouped output by 'Species', 'Temp_bin'. You can override
## using the `.groups` argument.